{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "832cbc95",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-12T21:24:16.239591Z",
     "iopub.status.busy": "2022-05-12T21:24:16.239187Z",
     "iopub.status.idle": "2022-05-12T21:24:17.738312Z",
     "shell.execute_reply": "2022-05-12T21:24:17.737324Z"
    },
    "papermill": {
     "duration": 1.515163,
     "end_time": "2022-05-12T21:24:17.741395",
     "exception": false,
     "start_time": "2022-05-12T21:24:16.226232",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.model_selection import cross_val_predict\n",
    "from sklearn.metrics import roc_auc_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "feccabad",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-12T21:24:17.766209Z",
     "iopub.status.busy": "2022-05-12T21:24:17.765879Z",
     "iopub.status.idle": "2022-05-12T21:24:21.083665Z",
     "shell.execute_reply": "2022-05-12T21:24:21.082746Z"
    },
    "papermill": {
     "duration": 3.332449,
     "end_time": "2022-05-12T21:24:21.086361",
     "exception": false,
     "start_time": "2022-05-12T21:24:17.753912",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# A copy of the data to download can be found at:\n",
    "# https://www.kaggle.com/datasets/lucamassaron/tabular-playground-series-jan-2021\n",
    "train = pd.read_csv(\"../input/tabular-playground-series-jan-2021/train.csv\")\n",
    "test = pd.read_csv(\"../input/tabular-playground-series-jan-2021/test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e4a08b03",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-12T21:24:21.109156Z",
     "iopub.status.busy": "2022-05-12T21:24:21.108793Z",
     "iopub.status.idle": "2022-05-12T21:24:21.188984Z",
     "shell.execute_reply": "2022-05-12T21:24:21.187969Z"
    },
    "papermill": {
     "duration": 0.095271,
     "end_time": "2022-05-12T21:24:21.192002",
     "exception": false,
     "start_time": "2022-05-12T21:24:21.096731",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "train = train.fillna(-1).drop([\"id\", \"target\"], axis=1)\n",
    "test = test.fillna(-1).drop([\"id\"], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b9263a7e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-12T21:24:21.214042Z",
     "iopub.status.busy": "2022-05-12T21:24:21.213574Z",
     "iopub.status.idle": "2022-05-12T21:24:21.245962Z",
     "shell.execute_reply": "2022-05-12T21:24:21.244879Z"
    },
    "papermill": {
     "duration": 0.046391,
     "end_time": "2022-05-12T21:24:21.248658",
     "exception": false,
     "start_time": "2022-05-12T21:24:21.202267",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "X = pd.concat([train, test], ignore_index=True)\n",
    "y = [0] * len(train) + [1] * len(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7e1cf7e6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-12T21:24:21.270675Z",
     "iopub.status.busy": "2022-05-12T21:24:21.270308Z",
     "iopub.status.idle": "2022-05-12T21:54:18.335472Z",
     "shell.execute_reply": "2022-05-12T21:54:18.333817Z"
    },
    "papermill": {
     "duration": 1797.082831,
     "end_time": "2022-05-12T21:54:18.341861",
     "exception": false,
     "start_time": "2022-05-12T21:24:21.259030",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model = RandomForestClassifier(random_state=0)\n",
    "cv_preds = cross_val_predict(model, X, y, cv=5, n_jobs=-1, method='predict_proba')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "dc5c98a6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-12T21:54:18.370805Z",
     "iopub.status.busy": "2022-05-12T21:54:18.369439Z",
     "iopub.status.idle": "2022-05-12T21:54:18.771394Z",
     "shell.execute_reply": "2022-05-12T21:54:18.770344Z"
    },
    "papermill": {
     "duration": 0.420268,
     "end_time": "2022-05-12T21:54:18.776677",
     "exception": false,
     "start_time": "2022-05-12T21:54:18.356409",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.49981959930833336\n"
     ]
    }
   ],
   "source": [
    "print(roc_auc_score(y_true=y, y_score=cv_preds[:,1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "371b4b6f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-12T21:54:18.805600Z",
     "iopub.status.busy": "2022-05-12T21:54:18.805239Z",
     "iopub.status.idle": "2022-05-12T21:54:18.813136Z",
     "shell.execute_reply": "2022-05-12T21:54:18.812068Z"
    },
    "papermill": {
     "duration": 0.023365,
     "end_time": "2022-05-12T21:54:18.816362",
     "exception": false,
     "start_time": "2022-05-12T21:54:18.792997",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "24793\n"
     ]
    }
   ],
   "source": [
    "print(np.sum(cv_preds[:len(X), 1] > 0.5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "4b165023",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-12T21:54:18.849718Z",
     "iopub.status.busy": "2022-05-12T21:54:18.849383Z",
     "iopub.status.idle": "2022-05-12T22:12:55.247629Z",
     "shell.execute_reply": "2022-05-12T22:12:55.246399Z"
    },
    "papermill": {
     "duration": 1116.431812,
     "end_time": "2022-05-12T22:12:55.265251",
     "exception": false,
     "start_time": "2022-05-12T21:54:18.833439",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier()"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9a6872a9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-12T22:12:55.293966Z",
     "iopub.status.busy": "2022-05-12T22:12:55.293295Z",
     "iopub.status.idle": "2022-05-12T22:12:55.812658Z",
     "shell.execute_reply": "2022-05-12T22:12:55.811391Z"
    },
    "papermill": {
     "duration": 0.537481,
     "end_time": "2022-05-12T22:12:55.815373",
     "exception": false,
     "start_time": "2022-05-12T22:12:55.277892",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cont14     : 0.0724\n",
      "cont4      : 0.0718\n",
      "cont2      : 0.0718\n",
      "cont7      : 0.0718\n",
      "cont5      : 0.0717\n",
      "cont8      : 0.0716\n",
      "cont3      : 0.0715\n",
      "cont13     : 0.0712\n",
      "cont12     : 0.0712\n",
      "cont1      : 0.0712\n",
      "cont10     : 0.0711\n",
      "cont9      : 0.0711\n",
      "cont11     : 0.0710\n",
      "cont6      : 0.0708\n"
     ]
    }
   ],
   "source": [
    "ranks = sorted(list(zip(X.columns, model.feature_importances_)), \n",
    "               key=lambda x: x[1], reverse=True)\n",
    "\n",
    "for feature, score in ranks:\n",
    "    print(f\"{feature:10} : {score:0.4f}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 2933.329012,
   "end_time": "2022-05-12T22:12:58.563839",
   "environment_variables": {},
   "exception": null,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2022-05-12T21:24:05.234827",
   "version": "2.3.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
